#!/usr/bin/env python
# coding: utf-8
import os
from os.path import dirname, exists, expanduser, isdir, join, splitext
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch

def load_hic_test(return_X_y=False):
    base_dir = join(dirname(__file__), 'data/')
    data_filename = join(base_dir, 'insurance.csv')

    descr = "Machine Learning with R by Brett Lantz is a book that provides an introduction to machine learning using R. As far as I can tell, Packt Publishing does not make its datasets available online unless you buy the book and create a user account which can be a problem if you are checking the book out from the library or borrowing the book from a friend. All of these datasets are in the public domain but simply needed some cleaning up and recoding to match the format in the book."

    data_train = pd.read_csv(data_filename)

    header_train = data_train.columns

    X = data_train.loc[:, ['age', 'sex', 'bmi', 'children', 'smoker', 'region']]
    y = data_train["charges"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=3)

    data_exercise = X_test
    header_exercise = X_test.columns

    data_physiological = y_test
    header_physiological = [y_test.name]

    return Bunch(all=data_train,
                 all_names=header_train,
                 data=data_exercise,
                 feature_names=header_exercise,
                 target=data_physiological,
                 target_names=header_physiological,
                 DESCR=descr,
                 data_filename=data_filename)
